“Baby Mental Life: Study 1” was conducted on MTurk on 2018-07-31.
Our planned sample was 300 participants, and we anticipated that roughly 85-90% of recruited participants would pass all of our attention checks, so we initially recruited 342 participants. After filtering out participants who failed at least one of our attention checks, we ended up retaining only XX participants, so we recruited an additional XX participants. In the end, we ended up with a sample of XX participants who passed our attention checks (out of XX participants recruited in total).
Each participant assessed children’s mental capacities at 3 target ages: newborns, 9-month-olds, and 5-year-olds. For each target, they rated 60 mental capacities on a scale from 0 (not at all capable) to 100 (completely capable).
For more details about the study, see our preregistration here.
# load required libraries
library(tidyverse)
[37m── [1mAttaching packages[22m ────────────────────────────────────────────── tidyverse 1.2.1 ──[39m
[37m[32m✔[37m [34mggplot2[37m 3.0.0 [32m✔[37m [34mpurrr [37m 0.2.5
[32m✔[37m [34mtibble [37m 1.4.2 [32m✔[37m [34mdplyr [37m 0.7.6
[32m✔[37m [34mtidyr [37m 0.8.1 [32m✔[37m [34mstringr[37m 1.3.1
[32m✔[37m [34mreadr [37m 1.1.1 [32m✔[37m [34mforcats[37m 0.3.0[39m
[37m── [1mConflicts[22m ───────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[37m [34mdplyr[37m::[32mfilter()[37m masks [34mstats[37m::filter()
[31m✖[37m [34mdplyr[37m::[32mlag()[37m masks [34mstats[37m::lag()[39m
library(langcog) # source: https://github.com/langcog/langcog-package
Attaching package: ‘langcog’
The following object is masked from ‘package:base’:
scale
library(psych)
Attaching package: ‘psych’
The following objects are masked from ‘package:ggplot2’:
%+%, alpha
# set theme for ggplots
theme_set(theme_bw())
# run source code (extra functions)
source("./scripts/max_factors_efa.R")
# make function for implementing criteria
reten_fun <- function(df, rot_type = c("oblimin", "varimax", "none")){
# figure out max number of factors to retain
n_var <- length(names(df))
max_k <- max_ok(n_var)
# run efa with max factors, unrotated
fa_unrot <- fa(df, nfactors = max_k, rotate = "none")
eigen <- fa_unrot$Vaccounted %>%
data.frame() %>%
rownames_to_column("param") %>%
gather(factor, value, -param) %>%
spread(param, value) %>%
filter(`SS loadings` > 1, `Proportion Explained` > 0.05)
retain_k <- nrow(eigen)
fa_rot <- fa(df, nfactors = retain_k, rotate = rot_type)
loadings <- fa_rot$loadings[] %>%
data.frame() %>%
rownames_to_column("capacity") %>%
gather(factor, loading, -capacity) %>%
group_by(capacity) %>%
top_n(1, abs(loading)) %>%
ungroup() %>%
count(factor)
retain_k_final <- nrow(loadings)
return(retain_k_final)
}
# make function for generating heatmap
heatmap_fun <- function(efa){
# get factor loadings
loadings <- efa$loadings[] %>%
data.frame() %>%
rownames_to_column("capacity") %>%
gather(factor, loading, -capacity)
# get fa.sort() order
order <- loadings %>%
group_by(capacity) %>%
top_n(1, abs(loading)) %>%
ungroup() %>%
arrange(factor, abs(loading)) %>%
mutate(order = 1:length(levels(factor(loadings$capacity)))) %>%
select(capacity, order)
# make plot
plot <- ggplot(loadings %>%
left_join(order) %>%
mutate(capacity = gsub("_", " ", capacity)),
aes(x = factor,
y = reorder(capacity, order),
fill = loading,
label = format(round(loading, 2), nsmall = 2))) +
geom_tile(color = "black") +
geom_text(size = 3) +
scale_fill_distiller(limits = c(-1, 1),
palette = "RdYlBu",
guide = guide_colorbar(barheight = 20)) +
theme_minimal() +
scale_x_discrete(position = "top") +
labs(x = "", y = "")
return(plot)
}
# make function for plotting factor scores by factor, target
scoresplot_fun <- function(efa,
target = c("all", "newborns", "9-month-olds", "5-year-olds")){
# generate list of targets
target_list <- case_when(
target == "all" ~ c("target00mo", "target09mo", "target60mo"),
target == "newborns" ~ "target00mo",
target == "9-month-olds" ~ "target09mo",
target == "5-year-olds" ~ "target60mo"
)
# make usable dataframe
df <- efa$scores[] %>%
data.frame() %>%
rownames_to_column("subid") %>%
mutate(ResponseId = gsub("_target.*$", "", subid),
target = gsub("R_.*_", "", subid)) %>%
filter(target %in% target_list) %>%
select(-subid) %>%
gather(factor, score, -c(ResponseId, target)) %>%
mutate(target = recode_factor(target,
"target00mo" = "newborns",
"target09mo" = "9-month-olds",
"target60mo" = "5-year-olds"))
# get bootstrapped means
df_boot <- df %>%
group_by(target, factor) %>%
multi_boot_standard("score", na.rm = T) %>%
ungroup()
# get first items for each factor
first_items <- efa$loadings[] %>%
data.frame() %>%
rownames_to_column("capacity") %>%
gather(factor, loading, -capacity) %>%
group_by(factor) %>%
top_n(3, abs(loading)) %>%
mutate(capacity = gsub("_", " ", capacity),
cap_list = str_c(capacity, collapse = ", "),
cap_list = paste0(cap_list, "...")) %>%
ungroup() %>%
select(-capacity, -loading) %>%
distinct()
# make plot
plot <- ggplot(df,
aes(x = target,
y = score,
color = factor)) +
facet_grid(~ factor) +
geom_path(aes(group = ResponseId), alpha = 0.1) +
geom_path(data = df_boot,
aes(y = mean, group = factor), color = "black", lty = 2) +
geom_pointrange(data = df_boot,
aes(y = mean, ymin = ci_lower, ymax = ci_upper),
color = "black", fatten = 0.75) +
geom_text(data = first_items,
aes(label = gsub('(.{1,30})(\\s|$)', '\\1\n', cap_list)),
x = 0.5, y = max(df$score), size = 3, color = "black",
hjust = 0, vjust = 1) +
theme_bw() +
labs(x = "target", y = "factor score",
subtitle = "Error bars are bootstrapped 95% confidence intervals") +
guides(color = "none")
# guides(color = guide_legend(override.aes = list(alpha = 1, size = 1)))
return(plot)
}
# make function for plotting individual item means by factor, target
itemsplot_fun <- function(efa,
target = c("all", "newborns", "9-month-olds", "5-year-olds")){
# generate list of targets
target_list <- case_when(
target == "all" ~ c("target00mo", "target09mo", "target60mo"),
target == "newborns" ~ "target00mo",
target == "9-month-olds" ~ "target09mo",
target == "5-year-olds" ~ "target60mo"
)
# make usable dataframe
df <- d_all %>%
rownames_to_column("subid") %>%
mutate(ResponseId = gsub("_target.*$", "", subid),
target = gsub("R_.*_", "", subid)) %>%
filter(target %in% target_list) %>%
select(-subid) %>%
gather(capacity, response, -c(ResponseId, target)) %>%
mutate(target = recode_factor(target,
"target00mo" = "newborns",
"target09mo" = "9-month-olds",
"target60mo" = "5-year-olds"))
# get factor loadings
loadings <- efa$loadings[] %>%
data.frame() %>%
rownames_to_column("capacity") %>%
gather(factor, loading, -capacity)
# get fa.sort() order
order <- efa$loadings[] %>%
data.frame() %>%
rownames_to_column("capacity") %>%
gather(factor, loading, -capacity) %>%
group_by(capacity) %>%
top_n(1, abs(loading)) %>%
ungroup() %>%
arrange(factor, abs(loading)) %>%
mutate(order = 1:length(levels(factor(loadings$capacity)))) %>%
select(factor, capacity, order)
# add order to df
df <- df %>% left_join(order)
# get bootstrapped means
df_boot <- df %>%
group_by(target, factor, capacity, order) %>%
multi_boot_standard("response", na.rm = T) %>%
ungroup() %>%
mutate(capacity = gsub("_", " ", capacity))
# make plot
plot <- ggplot(df %>%
left_join(order) %>%
mutate(capacity = gsub("_", " ", capacity)),
aes(x = response,
y = reorder(capacity, order),
color = factor)) +
facet_grid(factor ~ target, scales = "free", space = "free") +
geom_point(alpha = 0.02) +
geom_errorbarh(data = df_boot,
aes(xmin = ci_lower, xmax = ci_upper, x = NULL),
color = "black", height = 0) +
geom_point(data = df_boot,
aes(x = mean),
color = "black", size = 2) +
theme_bw() +
labs(x = "response", y = "",
subtitle = "Error bars are bootstrapped 95% confidence intervals") +
guides(color = "none")
# guides(color = guide_legend(override.aes = list(alpha = 1, size = 2)))
return(plot)
}
# load in raw data
d0 <- read.csv("./data/Baby mental life: Study 1_August 1, 2018_04.38.csv")
# make question key
question_key <- d0[1,] %>%
t() %>%
data.frame() %>%
rownames_to_column("question_qualtrics") %>%
rename("question_text" = X1) %>%
mutate(question = recode(question_qualtrics,
"Duration..in.seconds." = "Duration",
"Q2" = "Age",
"Q3" = "GenderSex",
"Q3_3_TEXT" = "GenderSex_fillIn",
"Q4" = "EnglishProf",
"Q5" = "FirstLang",
"Q5_2_TEXT" = "FirstLang_fillIn",
"Q18" = "RaceEthnicity",
"Q18_10_TEXT" = "RaceEthnicity_fillIn",
"Q19" = "Education",
"Q20" = "Income",
"Q21" = "MaritalStatus",
"Q21_6_TEXT" = "MaritalStatus_fillIn",
"Q22" = "HouseholdSize",
"Q23" = "Parent",
"Q25" = "ChildrenNumber",
"Q26" = "ChildrenYoungestAge",
"Q26_1_TEXT" = "ChildrenYoungestAge_fillIn1",
"Q26_2_TEXT" = "ChildrenYoungestAge_fillIn2",
"Q27" = "ChildrenOldestAge",
"Q27_1_TEXT" = "ChildrenOldestAge_fillIn1",
"Q27_2_TEXT" = "ChildrenOldestAge_fillIn2",
"Q28" = "Attention",
"Q29" = "Comments",
.default = question_qualtrics),
question = case_when(grepl("the following questions", question_text) ~
gsub("^.*extent is a ", "", question_text),
TRUE ~ question),
question = case_when(grepl("capable of...", question_text) ~
gsub("capable of... ", "", tolower(question)),
TRUE ~ question),
question = gsub(" ", "_", question),
question = gsub("'", "", question),
question = gsub("newborn_-_", "target00mo_", question),
question = gsub("9-month-old_-_", "target09mo_", question),
question = gsub("5-year-old_-_", "target60mo_", question)) %>%
mutate(question = gsub("-", "_", question),
question = gsub(" \\(for_example,_smooth,_rough\\)", "", question))
# rename questions
d1 <- d0 %>%
filter(grepl("R_", ResponseId)) %>% # get rid of extra info in first two rows
gather(question_qualtrics, response, -ResponseId) %>%
left_join(question_key %>% select(question_qualtrics, question)) %>%
select(-question_qualtrics) %>%
spread(question, response)
attributes are not identical across measure variables;
they will be droppedJoining, by = "question_qualtrics"
# implement inclusion/exclusion criteria
d2 <- d1 %>%
filter(Age >= 18, Age <= 45,
EnglishProf %in% c("Advanced", "Superior"),
`target00mo_please_select_34` == 34,
`target09mo_please_select_90` == 90,
`target60mo_please_select_4` == 4,
Attention == "Yes")
# recode variables & drop extraneous variables
d3 <- d2 %>%
select(-c(DistributionChannel, EndDate, ExternalReference, Finished, IPAddress,
starts_with("Location"), MTurkCode, payment, Progress,
starts_with("Recipient"), RecordedDate, StartDate, Status,
timeEstimate, UserLanguage)) %>%
mutate_at(vars(c(starts_with("target"), Age, ChildrenNumber,
ChildrenOldestAge_fillIn1, ChildrenOldestAge_fillIn2,
ChildrenYoungestAge_fillIn1, ChildrenYoungestAge_fillIn2,
Duration, HouseholdSize)),
funs(as.numeric(.))) %>%
mutate(Education = factor(Education,
levels = c("No schooling completed",
"Nursery school to 8th grade",
"Some high school, no diploma",
"High school graduate, diploma or equivalent (including GED)",
"Some college credit, no degree",
"Trade school, technical school, or vocational school",
"Associate's degree (for example, AA, AS)",
"Bachelor's degree (for example, BA, BS)",
"Master's degree (for example, MA, MS)",
"Doctor or professional degree (for example, PhD, JD, MD, MBA)")),
Income = factor(Income,
levels = c("$5,001 - 15,000",
"$15,001 - 30,000",
"$30,001 - 60,000",
"$60,001 - 90,000",
"$90,001 - 150,000",
"Greater than $150,000",
"Prefer not to say")),
Parent = factor(Parent,
levels = c("No", "Yes")))
NAs introduced by coercion
# make useful datasets
# final dataset with all measured variables
d <- d3 %>% distinct()
# demographic information
d_demo <- d %>%
select(ResponseId, Duration,
Age, starts_with("GenderSex"), starts_with("RaceEthnicity"),
starts_with("FirstLang"),
Education, Income, HouseholdSize,
starts_with("MaritalStatus"),
Parent, starts_with("Children"),
Comments) %>%
mutate(RaceEthnicity_collapse = ifelse(grepl(",([A-Za-z])", RaceEthnicity),
"Multiple", RaceEthnicity)) %>%
mutate(ChildrenOldestAge_collapse = case_when(
ChildrenOldestAge %in% c("My oldest child has not yet been born (I am/my partner is pregnant)", "My oldest child is deceased", "Prefer not to say") ~ ChildrenOldestAge,
ChildrenOldestAge == "In months:" ~
ifelse(as.numeric(ChildrenOldestAge_fillIn1)/12 < 1,
"< 1 year",
ifelse(as.numeric(ChildrenOldestAge_fillIn1)/12 < 3,
"1 - 3 years",
ifelse(as.numeric(ChildrenOldestAge_fillIn1)/12 < 5,
"3 - 5 years",
ifelse(as.numeric(ChildrenOldestAge_fillIn1)/12 < 10,
"5 - 10 years",
ifelse(as.numeric(ChildrenOldestAge_fillIn1)/12 < 18,
"10 - 18 years",
"> 18 years"))))),
ChildrenOldestAge == "In years:" ~
ifelse(as.numeric(ChildrenOldestAge_fillIn2) < 1,
"< 1 year",
ifelse(as.numeric(ChildrenOldestAge_fillIn2) < 3,
"1 - 3 years",
ifelse(as.numeric(ChildrenOldestAge_fillIn2) < 5,
"3 - 5 years",
ifelse(as.numeric(ChildrenOldestAge_fillIn2) < 10,
"5 - 10 years",
ifelse(as.numeric(ChildrenOldestAge_fillIn2) < 18,
"10 - 18 years",
"> 18 years"))))),
TRUE ~ "NA")) %>%
mutate(ChildrenOldestAge_collapse =
factor(ChildrenOldestAge_collapse,
levels = c("My oldest child has not yet been born (I am/my partner is pregnant)",
"< 1 year",
"1 - 3 years",
"3 - 5 years",
"5 - 10 years",
"10 - 18 years",
"> 18 years",
"My oldest child is deceased",
"Prefer not to say"))) %>%
mutate(ChildrenYoungestAge_collapse = case_when(
ChildrenYoungestAge %in% c("My youngest child has not yet been born (I am/my partner is pregnant)", "My youngest child is deceased", "Prefer not to say") ~ ChildrenYoungestAge,
ChildrenYoungestAge == "In months:" ~
ifelse(as.numeric(ChildrenYoungestAge_fillIn1)/12 < 1,
"< 1 year",
ifelse(as.numeric(ChildrenYoungestAge_fillIn1)/12 < 3,
"1 - 3 years",
ifelse(as.numeric(ChildrenYoungestAge_fillIn1)/12 < 5,
"3 - 5 years",
ifelse(as.numeric(ChildrenYoungestAge_fillIn1)/12 < 10,
"5 - 10 years",
ifelse(as.numeric(ChildrenYoungestAge_fillIn1)/12 < 18,
"10 - 18 years",
"> 18 years"))))),
ChildrenYoungestAge == "In years:" ~
ifelse(as.numeric(ChildrenYoungestAge_fillIn2) < 1,
"< 1 year",
ifelse(as.numeric(ChildrenYoungestAge_fillIn2) < 3,
"1 - 3 years",
ifelse(as.numeric(ChildrenYoungestAge_fillIn2) < 5,
"3 - 5 years",
ifelse(as.numeric(ChildrenYoungestAge_fillIn2) < 10,
"5 - 10 years",
ifelse(as.numeric(ChildrenYoungestAge_fillIn2) < 18,
"10 - 18 years",
"> 18 years"))))),
TRUE ~ "NA")) %>%
mutate(ChildrenYoungestAge_collapse =
factor(ChildrenYoungestAge_collapse,
levels = c("My Youngest child has not yet been born (I am/my partner is pregnant)",
"< 1 year",
"1 - 3 years",
"3 - 5 years",
"5 - 10 years",
"10 - 18 years",
"> 18 years",
"My Youngest child is deceased",
"Prefer not to say")))
# all assessments of ALL TARGETS, RepsonseId as rownames
d_all <- d %>%
select(ResponseId, starts_with("target"), -contains("please_select")) %>%
gather(question, response, -ResponseId) %>%
mutate(target = gsub("_.*$", "", question),
capacity = gsub("target..mo_", "", question),
subid = paste(ResponseId, target, sep = "_")) %>%
select(-ResponseId, -question, -target) %>%
spread(capacity, response) %>%
column_to_rownames("subid")
# all assessments of NEWBORNS, RepsonseId as rownames
d_00mo <- d_all %>%
rownames_to_column("subid") %>%
filter(grepl("target00mo", subid)) %>%
mutate(subid = gsub("target..mo_", "", subid)) %>%
column_to_rownames("subid")
# all assessments of 9-MONTH-OLDS, RepsonseId as rownames
d_09mo <- d_all %>%
rownames_to_column("subid") %>%
filter(grepl("target09mo", subid)) %>%
mutate(subid = gsub("target..mo_", "", subid)) %>%
column_to_rownames("subid")
# all assessments of 5-YEAR-OLDS, RepsonseId as rownames
d_60mo <- d_all %>%
rownames_to_column("subid") %>%
filter(grepl("target60mo", subid)) %>%
mutate(subid = gsub("target..mo_", "", subid)) %>%
column_to_rownames("subid")
Our primary analysis is an exploratory factor analysis (EFA) collapsing across all 3 target characters (and treating an individual participant’s responses to each character as if they were independent data points) - see the preregistration for more details.
We planned to examine three factor retention protocols in order to determine how many factors to retain: Parallel analysis, minimizing BIC, and a set of preset criteria outlined in Weisman et al. (2017). Here we look at each solution in turn.
reten_all_PA <- fa.parallel(d_all, plot = F); reten_all_PA
Parallel analysis suggests that the number of factors = 4 and the number of components = 3
Call: fa.parallel(x = d_all, plot = F)
Parallel analysis suggests that the number of factors = 4 and the number of components = 3
Eigen Values of
Original factors Simulated data Original components simulated data
1 33.72 0.59 34.13 1.57
2 7.02 0.52 7.57 1.51
3 1.17 0.49 1.66 1.48
4 0.91 0.46 1.35 1.45
reten_all_par <- reten_all_PA$nfact
efa_all_par <- fa(d_all, reten_all_par, "oblimin")
A loading greater than abs(1) was detected. Examine the loadings carefully.
heatmap_fun(efa_all_par) +
labs(title = "Parallel Analysis") #+
Joining, by = "capacity"
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 0.5, xmax = 1.5, ymin = 1.5, ymax = 21.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 1.5, xmax = 2.5, ymin = 25.5, ymax = 33.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 1.5, xmax = 2.5, ymin = 22.5, ymax = 24.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 2.5, xmax = 3.5, ymin = 33.5, ymax = 34.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 2.5, xmax = 3.5, ymin = 36.5, ymax = 54.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 3.5, xmax = 4.5, ymin = 54.5, ymax = 55.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 3.5, xmax = 4.5, ymin = 56.5, ymax = 57.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 3.5, xmax = 4.5, ymin = 58.5, ymax = 60.5)
scoresplot_fun(efa_all_par, target = "all") +
labs(title = "Parallel Analysis") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
itemsplot_fun(efa_all_par, target = "all") +
labs(title = "Parallel Analysis")
Joining, by = "capacity"
|====================================================== | 89% ~0 s remaining
|======================================================= | 92% ~0 s remaining
|======================================================== | 93% ~0 s remaining
|========================================================== | 96% ~0 s remaining
|============================================================ | 99% ~0 s remaining
Joining, by = c("capacity", "factor", "order")
reten_all_vss <- VSS(d_all, plot = F); reten_all_vss
Very Simple Structure
Call: vss(x = x, n = n, rotate = rotate, diagonal = diagonal, fm = fm,
n.obs = n.obs, plot = plot, title = title, use = use, cor = cor)
VSS complexity 1 achieves a maximimum of 0.95 with 1 factors
VSS complexity 2 achieves a maximimum of 0.99 with 2 factors
The Velicer MAP achieves a minimum of 0.01 with 6 factors
BIC achieves a minimum of -5999.22 with 6 factors
Sample Size adjusted BIC achieves a minimum of -1616.31 with 8 factors
Statistics by number of factors
vss1 vss2 map dof chisq prob sqresid fit RMSEA BIC SABIC complex eChisq
1 0.95 0.00 0.0653 1710 21267 0.0e+00 67.6 0.95 0.120 9797 15227 1.0 45047
2 0.59 0.99 0.0130 1651 8728 0.0e+00 10.3 0.99 0.074 -2347 2896 1.5 3270
3 0.45 0.84 0.0084 1593 6346 0.0e+00 7.6 0.99 0.062 -4340 719 2.0 1749
4 0.44 0.75 0.0059 1536 4809 0.0e+00 5.8 1.00 0.052 -5494 -617 2.3 882
5 0.43 0.76 0.0053 1480 4009 1.0e-231 5.2 1.00 0.047 -5919 -1219 2.3 632
6 0.43 0.75 0.0052 1425 3560 5.4e-183 4.8 1.00 0.044 -5999 -1474 2.4 502
7 0.43 0.75 0.0054 1371 3280 1.4e-157 4.5 1.00 0.042 -5916 -1563 2.4 433
8 0.43 0.74 0.0058 1318 3039 2.5e-137 4.3 1.00 0.041 -5802 -1616 2.5 377
SRMR eCRMS eBIC
1 0.125 0.127 33576
2 0.034 0.035 -7805
3 0.025 0.026 -8937
4 0.017 0.019 -9422
5 0.015 0.016 -9296
6 0.013 0.015 -9057
7 0.012 0.014 -8764
8 0.011 0.013 -8464
reten_all_bic <- data.frame(reten_all_vss$vss.stats %>%
rownames_to_column("nfactors") %>%
top_n(-1, BIC) %>%
select(nfactors))$nfactors %>% as.numeric()
efa_all_bic <- fa(d_all, reten_all_bic, "oblimin")
convergence not obtained in GPFoblq. 1000 iterations used.
heatmap_fun(efa_all_bic) +
labs(title = "Minimizing BIC") #+
Joining, by = "capacity"
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 0.5, xmax = 1.5, ymin = 0.5, ymax = 21.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 1.5, xmax = 2.5, ymin = 22.5, ymax = 32.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 2.5, xmax = 3.5, ymin = 32.5, ymax = 48.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 3.5, xmax = 4.5, ymin = 49.5, ymax = 50.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 3.5, xmax = 4.5, ymin = 51.5, ymax = 52.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 4.5, xmax = 5.5, ymin = 52.5, ymax = 54.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 4.5, xmax = 5.5, ymin = 56.5, ymax = 60.5)
scoresplot_fun(efa_all_bic, target = "all") +
labs(title = "Minimizing BIC") +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
itemsplot_fun(efa_all_bic, target = "all") +
labs(title = "Minimizing BIC")
Joining, by = "capacity"
|================================================ | 79% ~1 s remaining
|================================================= | 81% ~0 s remaining
|================================================== | 83% ~0 s remaining
|==================================================== | 86% ~0 s remaining
|====================================================== | 89% ~0 s remaining
|======================================================= | 92% ~0 s remaining
|========================================================= | 94% ~0 s remaining
|=========================================================== | 97% ~0 s remaining
|=============================================================|100% ~0 s remaining
Joining, by = c("capacity", "factor", "order")
reten_all_k <- reten_fun(d_all, rot_type = "oblimin")
print(paste("Preset criteria suggest retaining", reten_all_k, "factors"))
[1] "Preset criteria suggest retaining 2 factors"
efa_all_k <- fa(d_all, reten_all_k, "oblimin")
heatmap_fun(efa_all_k) +
labs(title = "Preset factor retention criteria\n(Weisman et al., 2017)") #+
Joining, by = "capacity"
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 0.5, xmax = 1.5, ymin = 0.5, ymax = 30.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 1.5, xmax = 2.5, ymin = 33.5, ymax = 60.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 1.5, xmax = 2.5, ymin = 30.5, ymax = 31.5)
scoresplot_fun(efa_all_k, target = "all") +
labs(title = "Preset factor retention criteria (Weisman et al., 2017)")
itemsplot_fun(efa_all_k, target = "all") +
labs(title = "Preset factor retention criteria (Weisman et al., 2017)")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
What happens if we limit ourselves to assessments of newborns’ mental capacities?
reten_00mo_PA <- fa.parallel(d_00mo, plot = F); reten_00mo_PA
Parallel analysis suggests that the number of factors = 4 and the number of components = 3
Call: fa.parallel(x = d_00mo, plot = F)
Parallel analysis suggests that the number of factors = 4 and the number of components = 3
Eigen Values of
Original factors Simulated data Original components simulated data
1 26.10 1.12 26.61 2.07
2 9.07 0.97 9.73 1.96
3 1.48 0.89 2.14 1.87
4 1.13 0.82 1.67 1.81
reten_00mo_par <- reten_00mo_PA$nfact
efa_00mo_par <- fa(d_00mo, reten_00mo_par, "oblimin")
Loading required namespace: GPArotation
A loading greater than abs(1) was detected. Examine the loadings carefully.
heatmap_fun(efa_00mo_par) +
labs(title = "Parallel Analysis") #+
Joining, by = "capacity"
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 0.5, xmax = 1.5, ymin = 1.5, ymax = 21.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 1.5, xmax = 2.5, ymin = 25.5, ymax = 33.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 1.5, xmax = 2.5, ymin = 22.5, ymax = 24.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 2.5, xmax = 3.5, ymin = 33.5, ymax = 34.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 2.5, xmax = 3.5, ymin = 36.5, ymax = 54.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 3.5, xmax = 4.5, ymin = 54.5, ymax = 55.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 3.5, xmax = 4.5, ymin = 56.5, ymax = 57.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 3.5, xmax = 4.5, ymin = 58.5, ymax = 60.5)
itemsplot_fun(efa_00mo_par, target = "newborns") +
labs(title = "Parallel Analysis")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
reten_00mo_vss <- VSS(d_00mo, plot = F); reten_00mo_vss
Very Simple Structure
Call: vss(x = x, n = n, rotate = rotate, diagonal = diagonal, fm = fm,
n.obs = n.obs, plot = plot, title = title, use = use, cor = cor)
VSS complexity 1 achieves a maximimum of 0.86 with 1 factors
VSS complexity 2 achieves a maximimum of 0.98 with 2 factors
The Velicer MAP achieves a minimum of 0.01 with 4 factors
BIC achieves a minimum of -6008.43 with 4 factors
Sample Size adjusted BIC achieves a minimum of -1263.12 with 8 factors
Statistics by number of factors
vss1 vss2 map dof chisq prob sqresid fit RMSEA BIC SABIC complex eChisq
1 0.86 0.00 0.0767 1710 8498 0.0e+00 112.9 0.86 0.127 -1094 4328 1.0 25186
2 0.66 0.98 0.0161 1651 3875 7.9e-180 18.2 0.98 0.076 -5386 -151 1.4 1988
3 0.57 0.94 0.0095 1593 3033 1.6e-92 13.7 0.98 0.063 -5902 -851 1.6 1140
4 0.45 0.78 0.0080 1536 2608 1.4e-58 11.1 0.99 0.056 -6008 -1138 2.1 719
5 0.48 0.80 0.0080 1480 2420 2.0e-48 10.1 0.99 0.054 -5882 -1190 2.1 610
6 0.48 0.79 0.0081 1425 2253 2.2e-40 9.3 0.99 0.052 -5740 -1222 2.2 520
7 0.46 0.78 0.0084 1371 2116 7.0e-35 8.5 0.99 0.051 -5574 -1227 2.3 447
8 0.47 0.79 0.0084 1318 1951 2.0e-27 7.8 0.99 0.048 -5442 -1263 2.3 377
SRMR eCRMS eBIC
1 0.161 0.164 15594
2 0.045 0.047 -7273
3 0.034 0.036 -7795
4 0.027 0.029 -7897
5 0.025 0.027 -7692
6 0.023 0.026 -7474
7 0.021 0.024 -7244
8 0.020 0.023 -7016
reten_00mo_bic <- data.frame(reten_00mo_vss$vss.stats %>%
rownames_to_column("nfactors") %>%
top_n(-1, BIC) %>%
select(nfactors))$nfactors %>% as.numeric()
efa_00mo_bic <- fa(d_00mo, reten_00mo_bic, "oblimin")
A loading greater than abs(1) was detected. Examine the loadings carefully.
heatmap_fun(efa_00mo_bic) +
labs(title = "Minimizing BIC") #+
Joining, by = "capacity"
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 0.5, xmax = 1.5, ymin = 0.5, ymax = 21.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 1.5, xmax = 2.5, ymin = 22.5, ymax = 32.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 2.5, xmax = 3.5, ymin = 32.5, ymax = 48.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 3.5, xmax = 4.5, ymin = 49.5, ymax = 50.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 3.5, xmax = 4.5, ymin = 51.5, ymax = 52.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 4.5, xmax = 5.5, ymin = 52.5, ymax = 54.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 4.5, xmax = 5.5, ymin = 56.5, ymax = 60.5)
itemsplot_fun(efa_00mo_bic, target = "newborns") +
labs(title = "Minimizing BIC")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
reten_00mo_k <- reten_fun(d_00mo, rot_type = "oblimin")
print(paste("Preset criteria suggest retaining", reten_00mo_k, "factors"))
[1] "Preset criteria suggest retaining 2 factors"
efa_00mo_k <- fa(d_00mo, reten_00mo_k, "oblimin")
heatmap_fun(efa_00mo_k) +
labs(title = "Preset factor retention criteria\n(Weisman et al., 2017)") #+
Joining, by = "capacity"
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 0.5, xmax = 1.5, ymin = 0.5, ymax = 30.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 1.5, xmax = 2.5, ymin = 33.5, ymax = 60.5) +
# annotate("rect", fill = NA, color = "black", size = 0.5,
# xmin = 1.5, xmax = 2.5, ymin = 30.5, ymax = 31.5)
itemsplot_fun(efa_00mo_k, target = "newborns") +
labs(title = "Preset factor retention criteria\n(Weisman et al., 2017)")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
What happens if we limit ourselves to assessments of 9-month-olds’ mental capacities?
reten_09mo_PA <- fa.parallel(d_09mo, plot = F); reten_09mo_PA
Parallel analysis suggests that the number of factors = 4 and the number of components = 2
Call: fa.parallel(x = d_09mo, plot = F)
Parallel analysis suggests that the number of factors = 4 and the number of components = 2
Eigen Values of
Original factors Simulated data Original components simulated data
1 26.79 1.12 27.31 2.07
2 9.41 0.98 10.07 1.97
3 1.30 0.90 1.83 1.88
4 1.03 0.84 1.60 1.82
reten_09mo_par <- reten_09mo_PA$nfact
efa_09mo_par <- fa(d_09mo, reten_09mo_par, "oblimin")
heatmap_fun(efa_09mo_par) +
labs(title = "Parallel Analysis")
Joining, by = "capacity"
itemsplot_fun(efa_09mo_par, target = "9-month-olds") +
labs(title = "Parallel Analysis")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
reten_09mo_vss <- VSS(d_09mo, plot = F); reten_09mo_vss
Very Simple Structure
Call: vss(x = x, n = n, rotate = rotate, diagonal = diagonal, fm = fm,
n.obs = n.obs, plot = plot, title = title, use = use, cor = cor)
VSS complexity 1 achieves a maximimum of 0.86 with 1 factors
VSS complexity 2 achieves a maximimum of 0.98 with 2 factors
The Velicer MAP achieves a minimum of 0.01 with 6 factors
BIC achieves a minimum of -5903.43 with 4 factors
Sample Size adjusted BIC achieves a minimum of -1255.75 with 8 factors
Statistics by number of factors
vss1 vss2 map dof chisq prob sqresid fit RMSEA BIC SABIC complex eChisq
1 0.86 0.00 0.0715 1710 8151 0.0e+00 117.1 0.86 0.124 -1441 3981 1.0 26699
2 0.71 0.98 0.0136 1651 3905 1.6e-183 15.7 0.98 0.076 -5356 -122 1.4 1726
3 0.54 0.85 0.0105 1593 3224 6.7e-113 12.5 0.99 0.067 -5712 -661 1.7 1135
4 0.46 0.77 0.0085 1536 2713 2.9e-68 10.0 0.99 0.059 -5903 -1033 2.0 718
5 0.46 0.78 0.0081 1480 2449 6.1e-51 8.9 0.99 0.055 -5853 -1160 2.1 578
6 0.47 0.76 0.0081 1425 2264 3.2e-41 8.1 0.99 0.052 -5730 -1212 2.2 485
7 0.47 0.76 0.0082 1371 2112 1.5e-34 7.5 0.99 0.051 -5578 -1231 2.3 419
8 0.47 0.76 0.0085 1318 1958 5.9e-28 7.0 0.99 0.049 -5435 -1256 2.3 367
SRMR eCRMS eBIC
1 0.166 0.169 17107
2 0.042 0.044 -7536
3 0.034 0.036 -7801
4 0.027 0.029 -7898
5 0.024 0.027 -7724
6 0.022 0.025 -7508
7 0.021 0.024 -7271
8 0.019 0.023 -7027
reten_09mo_bic <- data.frame(reten_09mo_vss$vss.stats %>%
rownames_to_column("nfactors") %>%
top_n(-1, BIC) %>%
select(nfactors))$nfactors %>% as.numeric()
efa_09mo_bic <- fa(d_09mo, reten_09mo_bic, "oblimin")
heatmap_fun(efa_09mo_bic) +
labs(title = "Minimizing BIC")
Joining, by = "capacity"
itemsplot_fun(efa_09mo_bic, target = "9-month-olds") +
labs(title = "Minimizing BIC")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
reten_09mo_k <- reten_fun(d_09mo, rot_type = "oblimin")
print(paste("Preset criteria suggest retaining", reten_09mo_k, "factors"))
[1] "Preset criteria suggest retaining 2 factors"
efa_09mo_k <- fa(d_09mo, reten_09mo_k, "oblimin")
heatmap_fun(efa_09mo_k) +
labs(title = "Preset factor retention criteria\n(Weisman et al., 2017)")
Joining, by = "capacity"
itemsplot_fun(efa_09mo_k, target = "9-month-olds") +
labs(title = "Preset factor retention criteria\n(Weisman et al., 2017)")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
What happens if we limit ourselves to assessments of 5-year-olds’ mental capacities?
reten_60mo_PA <- fa.parallel(d_60mo, plot = F); reten_60mo_PA
Parallel analysis suggests that the number of factors = 3 and the number of components = 2
Call: fa.parallel(x = d_60mo, plot = F)
Parallel analysis suggests that the number of factors = 3 and the number of components = 2
Eigen Values of
Original factors Simulated data Original components simulated data
1 35.15 1.15 35.53 2.09
2 5.65 0.99 6.20 1.97
3 1.24 0.91 1.65 1.89
reten_60mo_par <- reten_60mo_PA$nfact
efa_60mo_par <- fa(d_60mo, reten_60mo_par, "oblimin")
A loading greater than abs(1) was detected. Examine the loadings carefully.
heatmap_fun(efa_60mo_par) +
labs(title = "Parallel Analysis")
Joining, by = "capacity"
itemsplot_fun(efa_60mo_par, target = "5-year-olds") +
labs(title = "Parallel Analysis")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
reten_60mo_vss <- VSS(d_60mo, plot = F); reten_60mo_vss
Very Simple Structure
Call: vss(x = x, n = n, rotate = rotate, diagonal = diagonal, fm = fm,
n.obs = n.obs, plot = plot, title = title, use = use, cor = cor)
VSS complexity 1 achieves a maximimum of 0.96 with 1 factors
VSS complexity 2 achieves a maximimum of 0.99 with 2 factors
The Velicer MAP achieves a minimum of 0.01 with 5 factors
BIC achieves a minimum of -4908.09 with 5 factors
Sample Size adjusted BIC achieves a minimum of -428.6 with 8 factors
Statistics by number of factors
vss1 vss2 map dof chisq prob sqresid fit RMSEA BIC SABIC complex eChisq
1 0.96 0.00 0.050 1710 7767 0.0e+00 49.4 0.96 0.120 -1825 3597 1.0 10319
2 0.70 0.99 0.015 1651 4838 1.7e-309 11.0 0.99 0.090 -4423 812 1.4 1253
3 0.65 0.91 0.012 1593 4122 4.9e-223 8.3 0.99 0.082 -4814 237 1.7 765
4 0.63 0.89 0.011 1536 3726 1.1e-182 7.2 0.99 0.078 -4890 -20 1.8 597
5 0.63 0.89 0.010 1480 3394 1.5e-151 6.4 1.00 0.075 -4908 -215 1.9 481
6 0.63 0.89 0.010 1425 3192 9.1e-137 5.7 1.00 0.073 -4802 -283 1.9 409
7 0.63 0.87 0.010 1371 2974 3.7e-120 5.3 1.00 0.072 -4717 -370 2.0 356
8 0.63 0.87 0.011 1318 2786 4.3e-107 4.8 1.00 0.070 -4608 -429 2.0 308
SRMR eCRMS eBIC
1 0.103 0.105 726
2 0.036 0.037 -8008
3 0.028 0.030 -8171
4 0.025 0.027 -8019
5 0.022 0.024 -7821
6 0.021 0.023 -7585
7 0.019 0.022 -7335
8 0.018 0.021 -7085
reten_60mo_bic <- data.frame(reten_60mo_vss$vss.stats %>%
rownames_to_column("nfactors") %>%
top_n(-1, BIC) %>%
select(nfactors))$nfactors %>% as.numeric()
efa_60mo_bic <- fa(d_60mo, reten_60mo_bic, "oblimin")
heatmap_fun(efa_60mo_bic) +
labs(title = "Minimizing BIC")
Joining, by = "capacity"
itemsplot_fun(efa_60mo_bic, target = "5-year-olds") +
labs(title = "Minimizing BIC")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
reten_60mo_k <- reten_fun(d_60mo, rot_type = "oblimin")
print(paste("Preset criteria suggest retaining", reten_60mo_k, "factors"))
[1] "Preset criteria suggest retaining 2 factors"
efa_60mo_k <- fa(d_60mo, reten_60mo_k, "oblimin")
heatmap_fun(efa_60mo_k) +
labs(title = "Preset factor retention criteria\n(Weisman et al., 2017)")
Joining, by = "capacity"
itemsplot_fun(efa_60mo_k, target = "5-year-olds") +
labs(title = "Preset factor retention criteria\n(Weisman et al., 2017)")
Joining, by = "capacity"
Joining, by = c("capacity", "factor", "order")
ggplot(d_demo, aes(x = Duration/60)) +
geom_histogram(binwidth = 2) +
geom_vline(xintercept = median(d_demo$Duration/60), color = "blue", lty = 2) +
scale_x_continuous(breaks = seq(0, 10000, 4)) +
labs(title = "Duration of study (according to Qualtrics)",
subtitle = "Blue dotted line marks median",
x = "Duration (in minutes)",
y = "Number of participants")
ggplot(d_demo, aes(x = Age)) +
geom_histogram(binwidth = 2) +
geom_vline(xintercept = median(d_demo$Age), color = "blue", lty = 2) +
scale_x_continuous(breaks = seq(0, 10000, 4)) +
labs(title = "Particpiant age (self-reported)",
subtitle = "Blue dotted line marks median",
x = "Age (in years)",
y = "Number of participants")
ggplot(d_demo, aes(x = GenderSex)) +
geom_bar() +
labs(title = "Particpiant gender/sex (self-reported)",
x = "Gender/sex",
y = "Number of participants")
ggplot(d_demo, aes(x = gsub('(.{1,30})(\\s|$)', '\\1\n', RaceEthnicity_collapse))) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Particpiant race/ethnicity (self-reported)",
x = "Race/ethnicity",
y = "Number of participants")
ggplot(d_demo, aes(x = FirstLang)) +
geom_bar() +
labs(title = "Particpiant first language (self-reported)",
x = "Language",
y = "Number of participants")
ggplot(d_demo, aes(x = factor(Education,
levels = levels(d$Education),
labels = gsub('(.{1,30})(\\s|$)', '\\1\n',
levels(d$Education))))) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Particpiant educational attainment (self-reported)",
x = "Highest level of education completed",
y = "Number of participants")
ggplot(d_demo, aes(x = Income)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Particpiant household income (self-reported)",
x = "Annual household income",
y = "Number of participants")
ggplot(d_demo, aes(x = HouseholdSize)) +
geom_histogram(binwidth = 1) +
geom_vline(xintercept = median(d_demo$HouseholdSize), color = "blue", lty = 2) +
scale_x_continuous(breaks = seq(0, 10000, 1)) +
labs(title = "Particpiant household size (self-reported)",
subtitle = "Blue dotted line marks median",
x = "Number of people in household (adults and children)",
y = "Number of participants")
ggplot(d_demo, aes(x = MaritalStatus)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Particpiant marital status (self-reported)",
x = "Marital status",
y = "Number of participants")
ggplot(d_demo, aes(x = Parent)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Particpiant parent status (self-reported)",
subtitle = "'NA' indicates response of 'Prefer not to say'",
x = "Parent status",
y = "Number of participants")
ggplot(d_demo %>% filter(Parent == "Yes"), aes(x = ChildrenNumber)) +
geom_histogram(binwidth = 1) +
geom_vline(xintercept = median(d_demo[d_demo$Parent == "Yes",]$ChildrenNumber, na.rm = T),
color = "blue", lty = 2) +
scale_x_continuous(breaks = seq(0, 10000, 1)) +
labs(title = "Number of children among parents (self-reported)",
subtitle = "Blue dotted line marks median",
x = "Number of children (among parents)",
y = "Number of participants")
ggplot(d_demo %>% filter(Parent == "Yes"),
aes(x = factor(ChildrenOldestAge_collapse,
levels = levels(d_demo$ChildrenOldestAge_collapse),
labels = gsub('(.{1,30})(\\s|$)', '\\1\n',
levels(d_demo$ChildrenOldestAge_collapse))))) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Age of oldest child among parents (self-reported)",
x = "Age of child in years (among parents)",
y = "Number of participants")
ggplot(d_demo %>% filter(Parent == "Yes"),
aes(x = factor(ChildrenYoungestAge_collapse,
levels = levels(d_demo$ChildrenYoungestAge_collapse),
labels = gsub('(.{1,30})(\\s|$)', '\\1\n',
levels(d_demo$ChildrenYoungestAge_collapse))))) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1)) +
labs(title = "Age of youngest child among parents (self-reported)",
x = "Age of child in years (among parents)",
y = "Number of participants")